## load required libraries
library(tidyverse)
library(quanteda)
library(lexicon)
library(reshape2)
library(stringi)
library(quanteda.textplots)
library(gridExtra)## clean workspace
rm(list=ls())## set working directory (WD)
path <- '~/coliphi21/practice_lessons/lesson_1/src/'
setwd(path)## check that WD is set correctly
getwd()## [1] "/Users/lucienbaumgartner/coliphi21"
For this tutorial you can either work with your own data, or the pre-built copora provided in the /input-folder for the first practice session. The quanteda-package also contains pre-built corpora you can use. For this session, I will use the quanteda-corpus data_corpus_inaugural containing the inaugural addresses of US presidents since 1789. If you work with your own data or our other pre-built corpora, this vignette might be helpful.
df <- data_corpus_inaugural## how does the corpus object look like?
df## Corpus consisting of 59 documents and 4 docvars.
## 1789-Washington :
## "Fellow-Citizens of the Senate and of the House of Representa..."
##
## 1793-Washington :
## "Fellow citizens, I am again called upon by the voice of my c..."
##
## 1797-Adams :
## "When it was first perceived, in early times, that no middle ..."
##
## 1801-Jefferson :
## "Friends and Fellow Citizens: Called upon to undertake the du..."
##
## 1805-Jefferson :
## "Proceeding, fellow citizens, to that qualification which the..."
##
## 1809-Madison :
## "Unwilling to depart from examples of the most revered author..."
##
## [ reached max_ndoc ... 53 more documents ]
## summary statistics
summary(df) %>% head## what object class is the object?
class(df)## [1] "corpus" "character"
## how much space does it use?
object.size(df)## 838568 bytes
## what does data structure look like?
str(df)## 'corpus' Named chr [1:59] "Fellow-Citizens of the Senate and of the House of Representatives:\n\nAmong the vicissitudes incident to life n"| __truncated__ "Fellow citizens, I am again called upon by the voice of my country to execute the functions of its Chief Magist"| __truncated__ "When it was first perceived, in early times, that no middle course for America remained between unlimited submi"| __truncated__ "Friends and Fellow Citizens:\n\nCalled upon to undertake the duties of the first executive office of our countr"| __truncated__ "Proceeding, fellow citizens, to that qualification which the Constitution requires before my entrance on the ch"| __truncated__ "Unwilling to depart from examples of the most revered authority, I avail myself of the occasion now presented t"| __truncated__ "About to add the solemnity of an oath to the obligations imposed by a second call to the station in which my co"| __truncated__ ...
## - attr(*, "names")= chr [1:59] "1789-Washington" "1793-Washington" "1797-Adams" "1801-Jefferson" ...
## - attr(*, "docvars")='data.frame': 59 obs. of 7 variables:
## ..$ docname_ : chr [1:59] "1789-Washington" "1793-Washington" "1797-Adams" "1801-Jefferson" ...
## ..$ docid_ : Factor w/ 59 levels "1789-Washington",..: 1 2 3 4 5 6 7 8 9 10 ...
## ..$ segid_ : int [1:59] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ Year : int [1:59] 1789 1793 1797 1801 1805 1809 1813 1817 1821 1825 ...
## ..$ President: chr [1:59] "Washington" "Washington" "Adams" "Jefferson" ...
## ..$ FirstName: chr [1:59] "George" "George" "John" "Thomas" ...
## ..$ Party : Factor w/ 6 levels "Democratic","Democratic-Republican",..: 4 4 3 2 2 2 2 2 2 2 ...
## - attr(*, "meta")=List of 3
## ..$ system:List of 5
## .. ..$ package-version:Classes 'package_version', 'numeric_version' hidden list of 1
## .. .. ..$ : int [1:3] 2 1 2
## .. ..$ r-version :Classes 'R_system_version', 'package_version', 'numeric_version' hidden list of 1
## .. .. ..$ : int [1:3] 4 0 3
## .. ..$ system : Named chr [1:3] "Darwin" "x86_64" "kbenoit"
## .. .. ..- attr(*, "names")= chr [1:3] "sysname" "machine" "user"
## .. ..$ directory : chr "/Users/kbenoit/Dropbox (Personal)/GitHub/quanteda/quanteda"
## .. ..$ created : Date[1:1], format: "2021-01-25"
## ..$ object:List of 2
## .. ..$ unit : chr "documents"
## .. ..$ summary:List of 2
## .. .. ..$ hash: chr(0)
## .. .. ..$ data: NULL
## ..$ user :List of 6
## .. ..$ description: chr "Transcripts of all inaugural addresses delivered by United States Presidents, from Washington 1789 onward. Dat"| __truncated__
## .. ..$ source : chr "Gerhard Peters and John T. Woolley. The American Presidency Project."
## .. ..$ url : chr "https://www.presidency.ucsb.edu/documents/presidential-documents-archive-guidebook/inaugural-addresses"
## .. ..$ author : chr "(various US Presidents)"
## .. ..$ keywords : chr [1:5] "political" "US politics" "United States" "presidents" ...
## .. ..$ title : chr "US presidential inaugural address speeches"
## the document-level variables
docvars(df) %>% head## text data: how can we look at Biden's 2021 speech?
txt <- as.character(df)
names(txt)## [1] "1789-Washington" "1793-Washington" "1797-Adams" "1801-Jefferson" "1805-Jefferson" "1809-Madison" "1813-Madison" "1817-Monroe" "1821-Monroe" "1825-Adams" "1829-Jackson" "1833-Jackson" "1837-VanBuren" "1841-Harrison" "1845-Polk" "1849-Taylor" "1853-Pierce" "1857-Buchanan" "1861-Lincoln" "1865-Lincoln" "1869-Grant" "1873-Grant" "1877-Hayes" "1881-Garfield" "1885-Cleveland" "1889-Harrison" "1893-Cleveland" "1897-McKinley" "1901-McKinley" "1905-Roosevelt" "1909-Taft" "1913-Wilson" "1917-Wilson" "1921-Harding" "1925-Coolidge" "1929-Hoover" "1933-Roosevelt" "1937-Roosevelt" "1941-Roosevelt" "1945-Roosevelt" "1949-Truman" "1953-Eisenhower" "1957-Eisenhower" "1961-Kennedy" "1965-Johnson" "1969-Nixon" "1973-Nixon" "1977-Carter" "1981-Reagan" "1985-Reagan" "1989-Bush" "1993-Clinton" "1997-Clinton" "2001-Bush" "2005-Bush"
## [56] "2009-Obama" "2013-Obama" "2017-Trump" "2021-Biden.txt"
biden <- txt[grepl('Biden', names(txt))]
cat(biden)## Chief Justice Roberts, Vice President Harris, Speaker Pelosi, Leader Schumer, Leader McConnell, Vice President Pence, distinguished guests, and my fellow Americans.
##
## This is America's day.
##
## This is democracy's day.
##
## A day of history and hope.
##
## Of renewal and resolve.
##
## Through a crucible for the ages America has been tested anew and America has risen to the challenge.
##
## Today, we celebrate the triumph not of a candidate, but of a cause, the cause of democracy.
##
## The will of the people has been heard and the will of the people has been heeded.
##
## We have learned again that democracy is precious.
##
## Democracy is fragile.
##
## And at this hour, my friends, democracy has prevailed.
##
## So now, on this hallowed ground where just days ago violence sought to shake this Capitol's very foundation, we come together as one nation, under God, indivisible, to carry out the peaceful transfer of power as we have for more than two centuries.
##
## We look ahead in our uniquely American way – restless, bold, optimistic – and set our sights on the nation we know we can be and we must be.
##
## I thank my predecessors of both parties for their presence here.
##
## I thank them from the bottom of my heart.
##
## You know the resilience of our Constitution and the strength of our nation.
##
## As does President Carter, who I spoke to last night but who cannot be with us today, but whom we salute for his lifetime of service.
##
## I have just taken the sacred oath each of these patriots took — an oath first sworn by George Washington.
##
## But the American story depends not on any one of us, not on some of us, but on all of us.
##
## On "We the People" who seek a more perfect Union.
##
## This is a great nation and we are a good people.
##
## Over the centuries through storm and strife, in peace and in war, we have come so far. But we still have far to go.
##
## We will press forward with speed and urgency, for we have much to do in this winter of peril and possibility.
##
## Much to repair.
##
## Much to restore.
##
## Much to heal.
##
## Much to build.
##
## And much to gain.
##
## Few periods in our nation's history have been more challenging or difficult than the one we're in now.
##
## A once-in-a-century virus silently stalks the country.
##
## It's taken as many lives in one year as America lost in all of World War II.
##
## Millions of jobs have been lost.
##
## Hundreds of thousands of businesses closed.
##
## A cry for racial justice some 400 years in the making moves us. The dream of justice for all will be deferred no longer.
##
## A cry for survival comes from the planet itself. A cry that can't be any more desperate or any more clear.
##
## And now, a rise in political extremism, white supremacy, domestic terrorism that we must confront and we will defeat.
##
## To overcome these challenges – to restore the soul and to secure the future of America – requires more than words.
##
## It requires that most elusive of things in a democracy:
##
## Unity.
##
## Unity.
##
## In another January in Washington, on New Year's Day 1863, Abraham Lincoln signed the Emancipation Proclamation.
##
## When he put pen to paper, the President said, "If my name ever goes down into history it will be for this act and my whole soul is in it."
##
## My whole soul is in it.
##
## Today, on this January day, my whole soul is in this:
##
## Bringing America together.
##
## Uniting our people.
##
## And uniting our nation.
##
## I ask every American to join me in this cause.
##
## Uniting to fight the common foes we face:
##
## Anger, resentment, hatred.
##
## Extremism, lawlessness, violence.
##
## Disease, joblessness, hopelessness.
##
## With unity we can do great things. Important things.
##
## We can right wrongs.
##
## We can put people to work in good jobs.
##
## We can teach our children in safe schools.
##
## We can overcome this deadly virus.
##
## We can reward work, rebuild the middle class, and make health care
##
## secure for all.
##
## We can deliver racial justice.
##
## We can make America, once again, the leading force for good in the world.
##
## I know speaking of unity can sound to some like a foolish fantasy.
##
## I know the forces that divide us are deep and they are real.
##
## But I also know they are not new.
##
## Our history has been a constant struggle between the American ideal that we are all created equal and the harsh, ugly reality that racism, nativism, fear, and demonization have long torn us apart.
##
## The battle is perennial.
##
## Victory is never assured.
##
## Through the Civil War, the Great Depression, World War, 9/11, through struggle, sacrifice, and setbacks, our "better angels" have always prevailed.
##
## In each of these moments, enough of us came together to carry all of us forward.
##
## And, we can do so now.
##
## History, faith, and reason show the way, the way of unity.
##
## We can see each other not as adversaries but as neighbors.
##
## We can treat each other with dignity and respect.
##
## We can join forces, stop the shouting, and lower the temperature.
##
## For without unity, there is no peace, only bitterness and fury.
##
## No progress, only exhausting outrage.
##
## No nation, only a state of chaos.
##
## This is our historic moment of crisis and challenge, and unity is the path forward.
##
## And, we must meet this moment as the United States of America.
##
## If we do that, I guarantee you, we will not fail.
##
## We have never, ever, ever failed in America when we have acted together.
##
## And so today, at this time and in this place, let us start afresh.
##
## All of us.
##
## Let us listen to one another.
##
## Hear one another.
##
## See one another.
##
## Show respect to one another.
##
## Politics need not be a raging fire destroying everything in its path.
##
## Every disagreement doesn't have to be a cause for total war.
##
## And, we must reject a culture in which facts themselves are manipulated and even manufactured.
##
## My fellow Americans, we have to be different than this.
##
## America has to be better than this.
##
## And, I believe America is better than this.
##
## Just look around.
##
## Here we stand, in the shadow of a Capitol dome that was completed amid the Civil War, when the Union itself hung in the balance.
##
## Yet we endured and we prevailed.
##
## Here we stand looking out to the great Mall where Dr. King spoke of his dream.
##
## Here we stand, where 108 years ago at another inaugural, thousands of protestors tried to block brave women from marching for the right to vote.
##
## Today, we mark the swearing-in of the first woman in American history elected to national office – Vice President Kamala Harris.
##
## Don't tell me things can't change.
##
## Here we stand across the Potomac from Arlington National Cemetery, where heroes who gave the last full measure of devotion rest in eternal peace.
##
## And here we stand, just days after a riotous mob thought they could use violence to silence the will of the people, to stop the work of our democracy, and to drive us from this sacred ground.
##
## That did not happen.
##
## It will never happen.
##
## Not today.
##
## Not tomorrow.
##
## Not ever.
##
## To all those who supported our campaign I am humbled by the faith you have placed in us.
##
## To all those who did not support us, let me say this: Hear me out as we move forward. Take a measure of me and my heart.
##
## And if you still disagree, so be it.
##
## That's democracy. That's America. The right to dissent peaceably, within the guardrails of our Republic, is perhaps our nation's greatest strength.
##
## Yet hear me clearly: Disagreement must not lead to disunion.
##
## And I pledge this to you: I will be a President for all Americans.
##
## I will fight as hard for those who did not support me as for those who did.
##
## Many centuries ago, Saint Augustine, a saint of my church, wrote that a people was a multitude defined by the common objects of their love.
##
## What are the common objects we love that define us as Americans?
##
## I think I know.
##
## Opportunity.
##
## Security.
##
## Liberty.
##
## Dignity.
##
## Respect.
##
## Honor.
##
## And, yes, the truth.
##
## Recent weeks and months have taught us a painful lesson.
##
## There is truth and there are lies.
##
## Lies told for power and for profit.
##
## And each of us has a duty and responsibility, as citizens, as Americans, and especially as leaders – leaders who have pledged to honor our Constitution and protect our nation — to defend the truth and to defeat the lies.
##
## I understand that many Americans view the future with some fear and trepidation.
##
## I understand they worry about their jobs, about taking care of their families, about what comes next.
##
## I get it.
##
## But the answer is not to turn inward, to retreat into competing factions, distrusting those who don't look like you do, or worship the way you do, or don't get their news from the same sources you do.
##
## We must end this uncivil war that pits red against blue, rural versus urban, conservative versus liberal.
##
## We can do this if we open our souls instead of hardening our hearts.
##
## If we show a little tolerance and humility.
##
## If we're willing to stand in the other person's shoes just for a moment.
##
## Because here is the thing about life: There is no accounting for what fate will deal you.
##
## There are some days when we need a hand.
##
## There are other days when we're called on to lend one.
##
## That is how we must be with one another.
##
## And, if we are this way, our country will be stronger, more prosperous, more ready for the future.
##
## My fellow Americans, in the work ahead of us, we will need each other.
##
## We will need all our strength to persevere through this dark winter.
##
## We are entering what may well be the toughest and deadliest period of the virus.
##
## We must set aside the politics and finally face this pandemic as one nation.
##
## I promise you this: as the Bible says weeping may endure for a night but joy cometh in the morning.
##
## We will get through this, together
##
## The world is watching today.
##
## So here is my message to those beyond our borders: America has been tested and we have come out stronger for it.
##
## We will repair our alliances and engage with the world once again.
##
## Not to meet yesterday's challenges, but today's and tomorrow's.
##
## We will lead not merely by the example of our power but by the power of our example.
##
## We will be a strong and trusted partner for peace, progress, and security.
##
## We have been through so much in this nation.
##
## And, in my first act as President, I would like to ask you to join me in a moment of silent prayer to remember all those we lost this past year to the pandemic.
##
## To those 400,000 fellow Americans – mothers and fathers, husbands and wives, sons and daughters, friends, neighbors, and co-workers.
##
## We will honor them by becoming the people and nation we know we can and should be.
##
## Let us say a silent prayer for those who lost their lives, for those they left behind, and for our country.
##
## Amen.
##
## This is a time of testing.
##
## We face an attack on democracy and on truth.
##
## A raging virus.
##
## Growing inequity.
##
## The sting of systemic racism.
##
## A climate in crisis.
##
## America's role in the world.
##
## Any one of these would be enough to challenge us in profound ways.
##
## But the fact is we face them all at once, presenting this nation with the gravest of responsibilities.
##
## Now we must step up.
##
## All of us.
##
## It is a time for boldness, for there is so much to do.
##
## And, this is certain.
##
## We will be judged, you and I, for how we resolve the cascading crises of our era.
##
## Will we rise to the occasion?
##
## Will we master this rare and difficult hour?
##
## Will we meet our obligations and pass along a new and better world for our children?
##
## I believe we must and I believe we will.
##
## And when we do, we will write the next chapter in the American story.
##
## It's a story that might sound something like a song that means a lot to me.
##
## It's called "American Anthem" and there is one verse stands out for me:
##
## "The work and prayers
##
## of centuries have brought us to this day
##
## What shall be our legacy?
##
## What will our children say?…
##
## Let me know in my heart
##
## When my days are through
##
## America
##
## America
##
## I gave my best to you."
##
## Let us add our own work and prayers to the unfolding story of our nation.
##
## If we do this then when our days are through our children and our children's children will say of us they gave their best.
##
## They did their duty.
##
## They healed a broken land.
##
## My fellow Americans, I close today where I began, with a sacred oath.
##
## Before God and all of you I give you my word.
##
## I will always level with you.
##
## I will defend the Constitution.
##
## I will defend our democracy.
##
## I will defend America.
##
## I will give my all in your service thinking not of power, but of possibilities.
##
## Not of personal interest, but of the public good.
##
## And together, we shall write an American story of hope, not fear.
##
## Of unity, not division.
##
## Of light, not darkness.
##
## An American story of decency and dignity.
##
## Of love and of healing.
##
## Of greatness and of goodness.
##
## May this be the story that guides us.
##
## The story that inspires us.
##
## The story that tells ages yet to come that we answered the call of history.
##
## We met the moment.
##
## That democracy and hope, truth and justice, did not die on our watch but thrived.
##
## That our America secured liberty at home and stood once again as a beacon to the world.
##
## That is what we owe our forebearers, one another, and generations to follow.
##
## So, with purpose and resolve we turn to the tasks of our time.
##
## Sustained by faith.
##
## Driven by conviction.
##
## And, devoted to one another and to this country we love with all our hearts.
##
## May God bless America and may God protect our troops.
##
## Thank you, America.
# select Washington's 1789 speech to compare
cat(txt['1789-Washington'])## Fellow-Citizens of the Senate and of the House of Representatives:
##
## Among the vicissitudes incident to life no event could have filled me with greater anxieties than that of which the notification was transmitted by your order, and received on the 14th day of the present month. On the one hand, I was summoned by my Country, whose voice I can never hear but with veneration and love, from a retreat which I had chosen with the fondest predilection, and, in my flattering hopes, with an immutable decision, as the asylum of my declining years - a retreat which was rendered every day more necessary as well as more dear to me by the addition of habit to inclination, and of frequent interruptions in my health to the gradual waste committed on it by time. On the other hand, the magnitude and difficulty of the trust to which the voice of my country called me, being sufficient to awaken in the wisest and most experienced of her citizens a distrustful scrutiny into his qualifications, could not but overwhelm with despondence one who (inheriting inferior endowments from nature and unpracticed in the duties of civil administration) ought to be peculiarly conscious of his own deficiencies. In this conflict of emotions all I dare aver is that it has been my faithful study to collect my duty from a just appreciation of every circumstance by which it might be affected. All I dare hope is that if, in executing this task, I have been too much swayed by a grateful remembrance of former instances, or by an affectionate sensibility to this transcendent proof of the confidence of my fellow citizens, and have thence too little consulted my incapacity as well as disinclination for the weighty and untried cares before me, my error will be palliated by the motives which mislead me, and its consequences be judged by my country with some share of the partiality in which they originated.
##
## Such being the impressions under which I have, in obedience to the public summons, repaired to the present station, it would be peculiarly improper to omit in this first official act my fervent supplications to that Almighty Being who rules over the universe, who presides in the councils of nations, and whose providential aids can supply every human defect, that His benediction may consecrate to the liberties and happiness of the people of the United States a Government instituted by themselves for these essential purposes, and may enable every instrument employed in its administration to execute with success the functions allotted to his charge. In tendering this homage to the Great Author of every public and private good, I assure myself that it expresses your sentiments not less than my own, nor those of my fellow citizens at large less than either. No people can be bound to acknowledge and adore the Invisible Hand which conducts the affairs of men more than those of the United States. Every step by which they have advanced to the character of an independent nation seems to have been distinguished by some token of providential agency; and in the important revolution just accomplished in the system of their united government the tranquil deliberations and voluntary consent of so many distinct communities from which the event has resulted can not be compared with the means by which most governments have been established without some return of pious gratitude, along with an humble anticipation of the future blessings which the past seem to presage. These reflections, arising out of the present crisis, have forced themselves too strongly on my mind to be suppressed. You will join with me, I trust, in thinking that there are none under the influence of which the proceedings of a new and free government can more auspiciously commence.
##
## By the article establishing the executive department it is made the duty of the President "to recommend to your consideration such measures as he shall judge necessary and expedient." The circumstances under which I now meet you will acquit me from entering into that subject further than to refer to the great constitutional charter under which you are assembled, and which, in defining your powers, designates the objects to which your attention is to be given. It will be more consistent with those circumstances, and far more congenial with the feelings which actuate me, to substitute, in place of a recommendation of particular measures, the tribute that is due to the talents, the rectitude, and the patriotism which adorn the characters selected to devise and adopt them. In these honorable qualifications I behold the surest pledges that as on one side no local prejudices or attachments, no separate views nor party animosities, will misdirect the comprehensive and equal eye which ought to watch over this great assemblage of communities and interests, so, on another, that the foundation of our national policy will be laid in the pure and immutable principles of private morality, and the preeminence of free government be exemplified by all the attributes which can win the affections of its citizens and command the respect of the world. I dwell on this prospect with every satisfaction which an ardent love for my country can inspire, since there is no truth more thoroughly established than that there exists in the economy and course of nature an indissoluble union between virtue and happiness; between duty and advantage; between the genuine maxims of an honest and magnanimous policy and the solid rewards of public prosperity and felicity; since we ought to be no less persuaded that the propitious smiles of Heaven can never be expected on a nation that disregards the eternal rules of order and right which Heaven itself has ordained; and since the preservation of the sacred fire of liberty and the destiny of the republican model of government are justly considered, perhaps, as deeply, as finally, staked on the experiment entrusted to the hands of the American people.
##
## Besides the ordinary objects submitted to your care, it will remain with your judgment to decide how far an exercise of the occasional power delegated by the fifth article of the Constitution is rendered expedient at the present juncture by the nature of objections which have been urged against the system, or by the degree of inquietude which has given birth to them. Instead of undertaking particular recommendations on this subject, in which I could be guided by no lights derived from official opportunities, I shall again give way to my entire confidence in your discernment and pursuit of the public good; for I assure myself that whilst you carefully avoid every alteration which might endanger the benefits of an united and effective government, or which ought to await the future lessons of experience, a reverence for the characteristic rights of freemen and a regard for the public harmony will sufficiently influence your deliberations on the question how far the former can be impregnably fortified or the latter be safely and advantageously promoted.
##
## To the foregoing observations I have one to add, which will be most properly addressed to the House of Representatives. It concerns myself, and will therefore be as brief as possible. When I was first honored with a call into the service of my country, then on the eve of an arduous struggle for its liberties, the light in which I contemplated my duty required that I should renounce every pecuniary compensation. From this resolution I have in no instance departed; and being still under the impressions which produced it, I must decline as inapplicable to myself any share in the personal emoluments which may be indispensably included in a permanent provision for the executive department, and must accordingly pray that the pecuniary estimates for the station in which I am placed may during my continuance in it be limited to such actual expenditures as the public good may be thought to require.
##
## Having thus imparted to you my sentiments as they have been awakened by the occasion which brings us together, I shall take my present leave; but not without resorting once more to the benign Parent of the Human Race in humble supplication that, since He has been pleased to favor the American people with opportunities for deliberating in perfect tranquillity, and dispositions for deciding with unparalleled unanimity on a form of government for the security of their union and the advancement of their happiness, so His divine blessing may be equally conspicuous in the enlarged views, the temperate consultations, and the wise measures on which the success of this Government must depend.
## word tokenization
?tokens
toks <- tokens(df, remove_punct = T, remove_symbols = T, padding = F)
toks## Tokens consisting of 59 documents and 4 docvars.
## 1789-Washington :
## [1] "Fellow-Citizens" "of" "the" "Senate" "and" "of" "the" "House" "of" "Representatives" "Among" "the"
## [ ... and 1,418 more ]
##
## 1793-Washington :
## [1] "Fellow" "citizens" "I" "am" "again" "called" "upon" "by" "the" "voice" "of" "my"
## [ ... and 123 more ]
##
## 1797-Adams :
## [1] "When" "it" "was" "first" "perceived" "in" "early" "times" "that" "no" "middle" "course"
## [ ... and 2,306 more ]
##
## 1801-Jefferson :
## [1] "Friends" "and" "Fellow" "Citizens" "Called" "upon" "to" "undertake" "the" "duties" "of" "the"
## [ ... and 1,714 more ]
##
## 1805-Jefferson :
## [1] "Proceeding" "fellow" "citizens" "to" "that" "qualification" "which" "the" "Constitution" "requires" "before" "my"
## [ ... and 2,154 more ]
##
## 1809-Madison :
## [1] "Unwilling" "to" "depart" "from" "examples" "of" "the" "most" "revered" "authority" "I" "avail"
## [ ... and 1,163 more ]
##
## [ reached max_ndoc ... 53 more documents ]
## document-term matrix
dfx <- dfm(toks)
dfx## Document-feature matrix of: 59 documents, 9,422 features (91.89% sparse) and 4 docvars.
## features
## docs fellow-citizens of the senate and house representatives among vicissitudes incident
## 1789-Washington 1 71 116 1 48 2 2 1 1 1
## 1793-Washington 0 11 13 0 2 0 0 0 0 0
## 1797-Adams 3 140 163 1 130 0 2 4 0 0
## 1801-Jefferson 2 104 130 0 81 0 0 1 0 0
## 1805-Jefferson 0 101 143 0 93 0 0 7 0 0
## 1809-Madison 1 69 104 0 43 0 0 0 0 0
## [ reached max_ndoc ... 53 more documents, reached max_nfeat ... 9,412 more features ]
## top 10 features for every document
topfeatures(dfx, n = 10, groups = docnames(dfx))## $`1789-Washington`
## the of and to which in i be my by
## 116 71 48 48 36 31 23 23 22 20
##
## $`1793-Washington`
## the of i to in shall and by my it
## 13 11 6 5 3 3 2 2 2 2
##
## $`1797-Adams`
## the of and to a in it be by if
## 163 140 130 72 51 47 34 31 30 25
##
## $`1801-Jefferson`
## the of and to which that in our i a
## 130 104 81 61 25 24 24 24 21 21
##
## $`1805-Jefferson`
## the of and to that in with their them have
## 143 101 93 83 37 35 28 28 27 24
##
## $`1809-Madison`
## the of to and in a as which by i
## 104 69 61 43 34 19 15 14 11 11
##
## $`1813-Madison`
## the of and to a on our in it which
## 100 65 44 42 25 22 22 21 18 16
##
## $`1817-Monroe`
## the of to and in our a it be is
## 275 164 126 122 79 65 61 57 50 41
##
## $`1821-Monroe`
## the of to and in a which it be our
## 360 197 146 141 136 76 66 64 64 60
##
## $`1825-Adams`
## the of and to in by have that our been
## 304 245 116 101 62 38 36 36 36 29
##
## $`1829-Jackson`
## the of to and in that our a be their
## 92 71 53 49 24 21 18 16 16 16
##
## $`1833-Jackson`
## the of and to in our my a which all
## 101 76 53 46 23 19 18 15 14 14
##
## $`1837-VanBuren`
## the of and to in that our a it i
## 252 198 150 139 76 60 60 59 42 39
##
## $`1841-Harrison`
## the of to and in that a it which be
## 829 604 318 231 173 132 132 111 107 106
##
## $`1845-Polk`
## the of and to our in be a it that
## 397 298 189 184 101 87 76 65 54 47
##
## $`1849-Taylor`
## the of to and in i by be shall our
## 99 62 61 52 20 18 17 16 15 15
##
## $`1853-Pierce`
## the of and to a in be that which it
## 230 169 130 107 62 60 57 46 41 34
##
## $`1857-Buchanan`
## the of to and in a this our it is
## 238 139 105 97 61 58 39 35 32 32
##
## $`1861-Lincoln`
## the of to and in be that it a is
## 256 146 134 105 77 76 59 59 56 49
##
## $`1865-Lincoln`
## the to and of it that war all which in
## 58 27 24 22 13 12 12 10 9 9
##
## $`1869-Grant`
## the to of and in be i a it will
## 83 57 47 27 27 25 19 19 16 16
##
## $`1873-Grant`
## the of and to in i my a that be
## 106 72 50 49 26 25 21 21 20 19
##
## $`1877-Hayes`
## the of and to in a that be by as
## 240 166 102 88 63 41 39 32 26 26
##
## $`1881-Garfield`
## the of and to in is that a it our
## 317 181 119 80 49 37 35 35 35 35
##
## $`1885-Cleveland`
## the of and to in a our their is be
## 174 117 103 57 31 30 26 22 19 18
##
## $`1889-Harrison`
## the of and to in our that a not be
## 360 240 192 133 80 76 66 65 46 45
##
## $`1893-Cleveland`
## the of and to our in be which that by
## 156 119 102 79 46 36 25 23 21 21
##
## $`1897-McKinley`
## the of and to in be our a it is
## 345 228 171 113 81 65 60 57 56 46
##
## $`1901-McKinley`
## the of and to in we be it our for
## 200 110 97 65 42 28 27 26 25 23
##
## $`1905-Roosevelt`
## the of and we to in our a which have
## 65 45 38 32 28 23 22 20 16 15
##
## $`1909-Taft`
## the of and to in a be is as it
## 486 314 220 218 140 109 79 62 58 56
##
## $`1913-Wilson`
## the of and to we our in it a have
## 109 87 78 49 40 30 29 29 27 25
##
## $`1917-Wilson`
## the and of we to in our that have be
## 94 77 76 47 46 36 33 29 27 22
##
## $`1921-Harding`
## the of and to we our in for a is
## 200 159 152 104 80 68 63 52 47 47
##
## $`1925-Coolidge`
## the of and to we a in that is not
## 261 207 146 135 88 77 71 65 65 61
##
## $`1929-Hoover`
## the of and to in our a is for that
## 288 250 122 100 83 75 49 48 44 39
##
## $`1933-Roosevelt`
## the of and to in a that our we it
## 130 109 58 50 44 38 32 29 26 25
##
## $`1937-Roosevelt`
## of the to and we a that our in have
## 106 106 56 53 47 39 33 33 29 21
##
## $`1941-Roosevelt`
## the of and to in we a it is that
## 114 81 47 36 35 32 31 28 24 23
##
## $`1945-Roosevelt`
## the we of and to that our a in it
## 27 26 25 21 16 14 14 13 11 7
##
## $`1949-Truman`
## the and of to we in that a our for
## 141 100 96 81 59 56 37 36 32 30
##
## $`1953-Eisenhower`
## the of and to we in our that this a
## 171 142 101 81 66 65 58 40 37 33
##
## $`1957-Eisenhower`
## the of and we to in our all a is
## 114 96 64 51 44 43 38 26 25 20
##
## $`1961-Kennedy`
## the of to and we a in our that not
## 86 65 43 41 30 29 26 21 20 19
##
## $`1965-Johnson`
## the and of to in we a our that is
## 77 65 57 37 36 34 33 32 27 27
##
## $`1969-Nixon`
## the of to we in our that and as a
## 136 94 69 65 61 47 42 39 34 31
##
## $`1973-Nixon`
## the of to in and we a that for our
## 83 68 65 58 50 47 35 33 32 32
##
## $`1977-Carter`
## the and to we our of a for that in
## 53 48 44 43 35 33 29 24 23 22
##
## $`1981-Reagan`
## the and of to we our a in that will
## 122 92 90 80 57 56 46 45 34 33
##
## $`1985-Reagan`
## the and of to we a our in for is
## 130 110 95 73 68 59 55 46 35 33
##
## $`1989-Bush`
## the and a to of we is our in are
## 121 98 73 63 61 60 49 44 38 36
##
## $`1993-Clinton`
## the and our we to of in is for world
## 89 66 57 52 49 46 31 28 20 18
##
## $`1997-Clinton`
## the of and to our a we in new that
## 133 96 94 64 63 59 42 35 29 27
##
## $`2001-Bush`
## and of the our we a to in is not
## 82 58 53 50 47 46 45 31 31 27
##
## $`2005-Bush`
## the of and in our to we is that a
## 142 116 108 51 50 38 37 30 28 27
##
## $`2009-Obama`
## the and of to our we that a is in
## 135 111 82 70 67 62 49 47 36 25
##
## $`2013-Obama`
## the and our of we to that a for is
## 104 89 76 69 68 66 55 37 28 25
##
## $`2017-Trump`
## and the of our we will to is america a
## 77 71 48 47 46 40 36 21 18 15
##
## $`2021-Biden.txt`
## the and we of to a our in this i
## 101 96 88 77 65 46 43 42 39 33
## ugh, not very informative...## let's remove stopwords before creating a document-term matrix
## this is done during tokenization
stopwords('en')## [1] "i" "me" "my" "myself" "we" "our" "ours" "ourselves" "you" "your" "yours" "yourself" "yourselves" "he" "him" "his" "himself" "she" "her" "hers" "herself" "it" "its" "itself" "they" "them" "their" "theirs" "themselves" "what" "which" "who" "whom" "this" "that" "these" "those" "am" "is" "are" "was" "were" "be" "been" "being" "have" "has" "had" "having" "do" "does" "did" "doing" "would" "should" "could" "ought" "i'm" "you're" "he's" "she's" "it's" "we're" "they're" "i've" "you've" "we've" "they've" "i'd" "you'd" "he'd" "she'd" "we'd" "they'd" "i'll" "you'll"
## [77] "he'll" "she'll" "we'll" "they'll" "isn't" "aren't" "wasn't" "weren't" "hasn't" "haven't" "hadn't" "doesn't" "don't" "didn't" "won't" "wouldn't" "shan't" "shouldn't" "can't" "cannot" "couldn't" "mustn't" "let's" "that's" "who's" "what's" "here's" "there's" "when's" "where's" "why's" "how's" "a" "an" "the" "and" "but" "if" "or" "because" "as" "until" "while" "of" "at" "by" "for" "with" "about" "against" "between" "into" "through" "during" "before" "after" "above" "below" "to" "from" "up" "down" "in" "out" "on" "off" "over" "under" "again" "further" "then" "once" "here" "there" "when" "where"
## [153] "why" "how" "all" "any" "both" "each" "few" "more" "most" "other" "some" "such" "no" "nor" "not" "only" "own" "same" "so" "than" "too" "very" "will"
sel_toks <- tokens_select(toks, pattern = stopwords("en"), selection = "remove")
dfx <- dfm(sel_toks)# again: 10 features for every document, now without stopwords
topfeatures(dfx, n = 10, groups = docnames(dfx))## $`1789-Washington`
## can every government public may present country one citizens duty
## 9 9 8 6 6 5 5 4 4 4
##
## $`1793-Washington`
## shall now oath present country voice called citizens administration confidence
## 3 2 2 1 1 1 1 1 1 1
##
## $`1797-Adams`
## people government may nations country can states nation constitution foreign
## 20 16 13 11 9 9 9 9 8 8
##
## $`1801-Jefferson`
## government us may let one shall principle man citizens fellow
## 12 10 8 7 6 6 6 6 5 5
##
## $`1805-Jefferson`
## public citizens may fellow state among shall us can time
## 14 10 10 8 8 7 7 7 6 6
##
## $`1809-Madison`
## public nations can country well states rights peace confidence united
## 6 6 5 4 4 4 4 4 3 3
##
## $`1813-Madison`
## war country every united british states nation without spirit citizens
## 15 5 5 5 5 4 4 4 4 3
##
## $`1817-Monroe`
## states government great people every us united just may union
## 21 21 21 15 14 14 13 10 10 10
##
## $`1821-Monroe`
## great states united war may made citizens every government people
## 29 20 16 16 15 15 14 13 12 11
##
## $`1825-Adams`
## union government upon rights country public great peace first nation
## 20 17 16 10 9 9 9 9 8 8
##
## $`1829-Jackson`
## public government shall can power may people national whose duties
## 8 6 6 5 5 4 4 4 3 3
##
## $`1833-Jackson`
## government people union states powers upon general may united preservation
## 13 9 9 8 5 5 5 4 4 4
##
## $`1837-VanBuren`
## every people institutions government country upon us may can never
## 20 20 16 15 13 13 12 11 9 8
##
## $`1841-Harrison`
## power people government constitution may upon one can executive states
## 47 38 36 36 34 34 26 26 25 24
##
## $`1845-Polk`
## government states union one people powers constitution country interests upon
## 45 36 32 19 16 16 15 14 14 14
##
## $`1849-Taylor`
## shall government country duties may interests constitution us congress day
## 15 7 6 5 4 4 4 4 4 3
##
## $`1853-Pierce`
## upon can power government every may shall must us states
## 24 14 11 10 9 9 9 9 9 8
##
## $`1857-Buchanan`
## states shall constitution may people government great question country public
## 22 18 17 15 13 13 11 11 9 9
##
## $`1861-Lincoln`
## can constitution people union states government shall now upon one
## 28 24 20 20 19 18 17 15 15 14
##
## $`1865-Lincoln`
## war shall god years union let may right must us
## 12 5 5 4 4 4 3 3 3 3
##
## $`1869-Grant`
## country now every public may without us laws best dollar
## 8 8 6 5 5 5 5 5 5 5
##
## $`1873-Grant`
## country people made best great office one can government good
## 8 7 6 6 5 5 4 4 4 4
##
## $`1877-Hayes`
## country government upon public states political people great party citizens
## 20 15 15 11 11 10 9 9 8 7
##
## $`1881-Garfield`
## people government states constitution can upon great union law nation
## 21 20 15 15 13 13 11 11 10 9
##
## $`1885-Cleveland`
## people government public shall constitution interests every citizens policy upon
## 16 16 11 10 8 7 5 5 5 5
##
## $`1889-Harrison`
## people upon states shall public laws may great constitution government
## 29 21 20 18 17 17 12 12 11 10
##
## $`1893-Cleveland`
## people government us can every public american support national service
## 19 13 10 9 9 8 8 7 6 6
##
## $`1897-McKinley`
## upon people government must congress great country can public every
## 31 25 23 23 18 16 14 13 13 12
##
## $`1901-McKinley`
## government people upon now united states executive congress us shall
## 13 12 11 10 9 9 9 9 8 7
##
## $`1905-Roosevelt`
## us life people must great nation problems men power cause
## 12 6 6 6 5 5 5 4 4 4
##
## $`1909-Taft`
## government business must can may upon proper congress race law
## 26 22 19 18 18 16 15 14 13 13
##
## $`1913-Wilson`
## great government life every men upon justice things nation shall
## 14 9 8 8 8 8 8 7 6 6
##
## $`1917-Wilson`
## upon shall us purpose action life world peace stand can
## 13 9 8 8 7 6 6 6 6 5
##
## $`1921-Harding`
## world must america war never civilization can new order may
## 23 23 15 13 12 12 11 11 10 10
##
## $`1925-Coolidge`
## can country must great people government world peace much upon
## 26 17 17 16 15 14 13 13 12 12
##
## $`1929-Hoover`
## government can upon progress people world must peace justice nation
## 24 17 17 16 15 15 15 15 14 12
##
## $`1933-Roosevelt`
## can national must may people shall leadership helped nation world
## 11 9 9 8 7 7 7 7 6 6
##
## $`1937-Roosevelt`
## government people can nation good men see democracy power progress
## 15 11 9 9 8 8 8 8 7 7
##
## $`1941-Roosevelt`
## nation know spirit democracy life us people america years freedom
## 11 10 9 9 8 8 7 7 6 6
##
## $`1945-Roosevelt`
## shall peace learned men today can way test life fellow
## 7 6 5 4 4 3 3 3 2 2
##
## $`1949-Truman`
## nations world can peace people freedom free united must security
## 22 22 16 14 12 12 11 10 9 9
##
## $`1953-Eisenhower`
## free world faith peace shall us people must upon freedom
## 21 14 13 12 11 11 10 10 10 10
##
## $`1957-Eisenhower`
## may nations world peace freedom people seek can must upon
## 15 14 14 11 11 10 10 9 9 6
##
## $`1961-Kennedy`
## let us can world sides new pledge citizens nations free
## 16 12 9 8 8 7 7 5 5 5
##
## $`1965-Johnson`
## us change nation must people union man world old every
## 12 12 11 10 9 9 9 7 7 6
##
## $`1969-Nixon`
## us can people world peace let know now make earth
## 20 17 14 13 12 11 10 9 9 9
##
## $`1973-Nixon`
## us let peace world new can america responsibility government great
## 26 22 19 16 15 14 13 11 10 9
##
## $`1977-Carter`
## can nation new must us people together strength spirit human
## 13 10 9 8 8 7 7 7 6 5
##
## $`1981-Reagan`
## us government must believe people americans one time world freedom
## 25 16 10 10 9 9 8 8 8 8
##
## $`1985-Reagan`
## us people world one government freedom must time now human
## 27 16 15 14 13 13 12 10 10 9
##
## $`1989-Bush`
## new us can great nation world free must hand good
## 14 13 11 10 10 10 9 9 8 8
##
## $`1993-Clinton`
## world must america us people today new let change americans
## 18 18 15 13 12 10 9 9 9 9
##
## $`1997-Clinton`
## new us century nation time every people america land one
## 29 27 20 13 12 11 11 11 11 10
##
## $`2001-Bush`
## us country citizens story nation america can every must never
## 11 9 9 9 8 8 6 6 6 5
##
## $`2005-Bush`
## freedom liberty america every one nation country world americans america's
## 25 15 12 10 9 9 8 8 8 8
##
## $`2009-Obama`
## us can nation new every must america people less let
## 23 13 12 11 8 8 8 7 7 7
##
## $`2013-Obama`
## us must people time can every together make one country
## 21 17 11 10 7 7 7 7 6 6
##
## $`2017-Trump`
## america american people country one every never great nation new
## 18 11 10 9 8 7 6 6 6 6
##
## $`2021-Biden.txt`
## us america can one nation must democracy people another american
## 27 18 16 15 12 10 10 9 9 9
# we can also compute topfeatures by any docvar
docvars(dfx)topfeatures(dfx, n = 10, groups = Party)## $Democratic
## us people can government must nation world new shall every
## 222 199 173 143 138 126 118 113 111 109
##
## $`Democratic-Republican`
## government great states war may public every us union country
## 68 61 56 51 49 48 45 44 42 40
##
## $Federalist
## people government may nations country can states nation constitution foreign
## 20 16 13 11 9 9 9 9 8 8
##
## $none
## can every government may present country public shall citizens people
## 9 9 9 7 6 6 6 6 5 5
##
## $Republican
## people government can us must upon world great country peace
## 264 240 228 218 201 192 180 159 147 139
##
## $Whig
## government states people power constitution may upon union one country
## 88 61 57 57 55 51 50 47 45 42
Sometimes we want to analyze certain indicators on sentence-level. To show how to go about doing so, we will compute the per-sentence sentiment in Biden’s 2021 speech.
## first step: extract Biden's speech from the corpus
biden <- corpus_subset(df, President == 'Biden')
## 2nd step: reshape corpus from full texts to sentences
sentences <- corpus_reshape(biden, to = 'sentences')
sentences## Corpus consisting of 216 documents and 4 docvars.
## 2021-Biden.txt.1 :
## "Chief Justice Roberts, Vice President Harris, Speaker Pelosi..."
##
## 2021-Biden.txt.2 :
## "This is America's day."
##
## 2021-Biden.txt.3 :
## "This is democracy's day."
##
## 2021-Biden.txt.4 :
## "A day of history and hope."
##
## 2021-Biden.txt.5 :
## "Of renewal and resolve."
##
## 2021-Biden.txt.6 :
## "Through a crucible for the ages America has been tested anew..."
##
## [ reached max_ndoc ... 210 more documents ]
## 3rd step: within-sentence word tokenization
# tokenize
sentence_toks <- tokens(sentences, what = 'word', remove_punct = T)
# make lower case
sentence_toks <- tokens_tolower(sentence_toks)
# remove stopwords
sentence_toks <- tokens_select(sentence_toks, pattern = stopwords("en"), selection = "remove")## select a sentiment dictionary
## we use the Proksch et al. (2015 dictionary native to quanteda)
data_dictionary_LSD2015## Dictionary object with 4 key entries.
## - [negative]:
## - a lie, abandon*, abas*, abattoir*, abdicat*, aberra*, abhor*, abject*, abnormal*, abolish*, abominab*, abominat*, abrasiv*, absent*, abstrus*, absurd*, abus*, accident*, accost*, accursed* [ ... and 2,838 more ]
## - [positive]:
## - ability*, abound*, absolv*, absorbent*, absorption*, abundanc*, abundant*, acced*, accentuat*, accept*, accessib*, acclaim*, acclamation*, accolad*, accommodat*, accomplish*, accord, accordan*, accorded*, accords [ ... and 1,689 more ]
## - [neg_positive]:
## - best not, better not, no damag*, no no, not ability*, not able, not abound*, not absolv*, not absorbent*, not absorption*, not abundanc*, not abundant*, not acced*, not accentuat*, not accept*, not accessib*, not acclaim*, not acclamation*, not accolad*, not accommodat* [ ... and 1,701 more ]
## - [neg_negative]:
## - not a lie, not abandon*, not abas*, not abattoir*, not abdicat*, not aberra*, not abhor*, not abject*, not abnormal*, not abolish*, not abominab*, not abominat*, not abrasiv*, not absent*, not abstrus*, not absurd*, not abus*, not accident*, not accost*, not accursed* [ ... and 2,840 more ]
## apply dictionary to the Biden's speech
toks_lsd <- tokens_lookup(sentence_toks, dictionary = data_dictionary_LSD2015[1:2])
dfm_lsd <- dfm(toks_lsd)
## compute percentage of positive words per sentence
## over the course of the speech
# melt dfm to long table
df_lsd <- convert(dfm_lsd, to = "data.frame")
df_lsddf_lsd <- melt(df_lsd, id.vars = 'doc_id', variable.name = 'sentiment', value.name = 'n')
head(df_lsd)# group by sentence (doc_id) and compute percentages
df_lsd <- df_lsd %>%
group_by(doc_id) %>%
mutate(perc = n/sum(n))
head(df_lsd)# give every sentence a numeric value corresponding to doc ID
df_lsd <- df_lsd %>%
ungroup %>%
mutate(num_id = as.numeric(stri_extract(doc_id, regex = '(?<=\\.)[0-9]+')))## plot results
ggplot(data = df_lsd, aes(x = num_id, y = perc, colour = sentiment, group = sentiment)) +
geom_smooth() +
#geom_point(alpha = 0.5) +
theme_classic() +
theme(plot.title = element_text(face = 'bold')) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0.01, 0.01), labels = scales::percent) +
labs(
title = "Biden's 2021 speech: sentiment per sentence, smoothed",
x = 'Sentence Number Within Speech',
y = 'Frequency'
)## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## inspect sentence-tokens vectors 55-65
sentence_toks[55:65]## Tokens consisting of 11 documents and 4 docvars.
## 2021-Biden.txt.1 :
## [1] "can" "right" "wrongs"
##
## 2021-Biden.txt.2 :
## [1] "can" "put" "people" "work" "good" "jobs"
##
## 2021-Biden.txt.3 :
## [1] "can" "teach" "children" "safe" "schools"
##
## 2021-Biden.txt.4 :
## [1] "can" "overcome" "deadly" "virus"
##
## 2021-Biden.txt.5 :
## [1] "can" "reward" "work" "rebuild" "middle" "class" "make" "health" "care" "secure"
##
## 2021-Biden.txt.6 :
## [1] "can" "deliver" "racial" "justice"
##
## [ reached max_ndoc ... 5 more documents ]
## uh.. this doesn't look like it's meant negatively.
## let's double check by reading the untokenized sentences
sentences[55:65]## Corpus consisting of 11 documents and 4 docvars.
## 2021-Biden.txt.1 :
## "We can right wrongs."
##
## 2021-Biden.txt.2 :
## "We can put people to work in good jobs."
##
## 2021-Biden.txt.3 :
## "We can teach our children in safe schools."
##
## 2021-Biden.txt.4 :
## "We can overcome this deadly virus."
##
## 2021-Biden.txt.5 :
## "We can reward work, rebuild the middle class, and make healt..."
##
## 2021-Biden.txt.6 :
## "We can deliver racial justice."
##
## [ reached max_ndoc ... 5 more documents ]
## as expected, Biden is mentioning ISSUES, but in a combative wayIn certain situations, we would like to analyze the embedding of certain words. For this, we need to adjust the level of analysis accordingly.
## tokenization
toks <- tokens(df, remove_punct = T, remove_symbols = T, padding = F)
toks <- tokens_replace(toks,
pattern = '\\bUS\\b',
replacement = 'USA')
toks <- tokens_tolower(toks)
## lemmatizing
toks <- tokens_replace(toks,
pattern = lexicon::hash_lemmas$token,
replacement = lexicon::hash_lemmas$lemma)
## remove stopwords stopwords
# custom stopwords
cstmwrds <- c('upon', 'can', 'us', 'let', 'may', 'us', 'make',
'must', 'many', 'shall', 'without', 'among',
'much', 'every', 'ever', 'know', 'new', 'never',
'year', 'find', 'see')
# remove them all
toks <- tokens_select(toks, pattern = c(stopwords("en"), cstmwrds), selection = "remove")
## inspect topfeatures to select interesting words
dfx <- dfm(toks)
topfeatures(dfx, 50)## people government nation good state great country power world one time citizen right law peace american now man public unite constitution duty war america interest free union life freedom hope give national principle work justice spirit liberty purpose take high come long congress just need party little part place call
## 623 615 504 474 450 439 345 342 321 277 271 264 260 259 258 255 229 226 225 224 213 212 203 202 197 190 190 188 187 167 166 158 155 153 142 142 137 136 135 134 134 131 130 128 127 125 122 122 121 118
## defining interesting words
query <- c('progress', 'spirit', 'world', 'nation', 'duty', 'war')
## feature co-occurence matrix
container <- list()
for(m in c('Democratic', 'Republican')){
for(i in query){
toks_sel <- tokens_subset(toks, Party == m)
toks_sel <- tokens_select(toks_sel, pattern=i, selection = "keep", window = 10,
padding = FALSE, verbose = TRUE)
dfcmat <- fcm(toks_sel, context = 'window', window = 5,
count = 'weighted', tri = FALSE)
feat <- names(topfeatures(dfcmat, 51))
dfcmat_sel <- fcm_select(dfcmat, pattern = feat, selection = "keep")
## plot
label_sizes <- rowSums(dfcmat_sel)/min(rowSums(dfcmat_sel))*0.8
label_sizes[i] <- 0.1
set.seed(123)
p <- quanteda.textplots::textplot_network(dfcmat_sel,
min_freq = 0.5,
edge_alpha = 0.2,
vertex_size = rowSums(dfcmat_sel)/min(rowSums(dfcmat_sel))/8,
vertex_labelsize = label_sizes,
edge_color = ifelse(m=='Republican', 'firebrick', 'dodgerblue'))
container[[paste0(m, ': ', i)]] <- p +
labs(title = paste0(m, ': ', i)) +
theme(
plot.title = element_text(face = 'bold')
)
}
}## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
## kept 1 feature
names(container)## [1] "Democratic: progress" "Democratic: spirit" "Democratic: world" "Democratic: nation" "Democratic: duty" "Democratic: war" "Republican: progress" "Republican: spirit" "Republican: world" "Republican: nation" "Republican: duty" "Republican: war"
grid.arrange(container[[1]], container[[7]],
container[[2]], container[[8]],
container[[3]], container[[9]],
container[[4]], container[[10]],
container[[5]], container[[11]],
container[[6]], container[[12]],
ncol = 2)A work by Lucien Baumgartner & Kevin Reuter